********************************************************************************************************
* Study Title: Social Exclusion and Political Identity: The Case of Asian American Partisanship
* Replication File for Observational Data
* Date: May 9, 2016
* Written By: Alexander Kuo, Neil Malhotra, and Cecilia Hyunjung Mo
* Dataset: 31481-0001-Data.dta
* Dataset and Codebook are available at: http://naasurvey.com/data/
********************************************************************************************************

use 31481-0001-Data.dta, clear




********************************************************************************************************
********************************************************************************************************
****                                           CLEAN DATA                                           **** 
********************************************************************************************************
********************************************************************************************************

set more off

***PARTY IDENTIFICATION***
*Party identification
gen partyid=QD1
label define partyid 1 "Republican" 2 "Democrat" 3 "Independent" 4 "Other"
label value partyid partyid
tab partyid
move partyid QD1
tab QD1

*Strong Partisans*
*QD1A Would call yourself a strong Republican or not a strong Republican?
gen strongrep=QD1A
recode strongrep (2=0)
label var strongrep "Strong Republican?"
move strongrep QD1A

*Strong Democrats*
*QD1B Would call yourself a strong Democrat or not a strong Democrat?
gen strongdem=QD1B 
recode strongdem (2=0)
label var strongdem "Strong Democrat?"
move strongdem QD1B

*Independents
gen liberal_leaning_rep=QD1C
recode liberal_leaning_rep (2=0)
label define liberal_leaning_rep 0 "Lean Dem" 1 "Lean Rep"
label value liberal_leaning_rep liberal_leaning_rep
move liberal_leaning_rep QD1C

*6-point scale partisanship
gen pID=.
replace pID=1 if strongdem==1
replace pID=2 if strongdem==0
replace pID=3 if liberal_leaning_rep==0
replace pID=4 if liberal_leaning_rep==1
replace pID=5 if strongrep==0
replace pID=6 if strongrep==1

label var pID "Party Identification Scale"
label define pID 1 "Strong Democrat" 2 "Not very strong Democrat" 3 "Independent Leaning Democrat" 4 "Independent Leaning Republican" 5 "Not very strong Republican" 6 "Strong Republican"
label value pID pID
tab pID, missing

*inverting & rescaling (1: strong democrat)
gen pid=(6-pID)/5
label var pid "Party Identification Scale (Recoded; High: Democrat)"
move pid QD1
move pID QD1
tab pid, missing

*generating binary variable indicating missing pid*
gen pid_miss=pid
replace pid_miss=0 if pid!=.
replace pid_miss=1 if pid==.
tab pid_miss

*recode of pid so that "missing" are coded in the middle (.5)
gen pidx=pid
replace pidx=.5 if pid==.
tab pidx, missing
label var pidx "missing coded as .5"

*binary indication of leaning Democrat*
gen dem=.
replace dem=0
replace dem=1 if pID==1 | pID==2 | pID==3
tab dem
move dem pID

*treat missing data as missing, leaning Democrat, 0's are all those who express any political interest (Republican, Democrat, etc)*
gen demx=.
replace demx=1 if pID==1 | pID==2 | pID==3
replace demx=0 if pID==4 | pID==5 | pID==6
tab demx
move demx pID
label var demx "Democrat compared to Republican"

*recode of pid in three categories: "lean Dem", "lean Rep", and all rest as other category
gen pidcat=.
replace pidcat=0
replace pidcat=1 if pID==4 | pID==5 | pID==6
replace pidcat=2 if pID==1 | pID==2 | pID==3
label var pidcat "2=lean Dem, 1=lean Rep, 0=everyone else"
tab pidcat, missing


***SOCIAL EXCLUSION***
*Generate binary indicator of missing
*First binary indicator treating all missing data on racial discrimination indicators as zero
gen victim=.
replace victim=0
replace victim=1 if  QF5_A==1  | QF5_B==1  | QF5_C==1  | QF5_D==1  | QF5_E==1  | QF5A_A==1  | QF5A_B==1  | QF5A_C==1  | QF5A_D==1  | QF5A_E==1  | QF6==1
tab victim, missing
move victim QF5_A
label var victim "suffered any racial aggression, missing=0"

gen victim_miss=0
replace victim_miss=1 if QF5_A>97  | QF5_B>97  | QF5_C>97  | QF5_D>97  | QF5_E>97  | QF5A_A>97  | QF5A_B>97  | QF5A_C>97  | QF5A_D>97  | QF5A_E>97  | QF6>97
tab victim_miss, missing

*QA4 US born
gen usborn=QA4
recode usborn 2=0 98/99=0
tab usborn, missing
move usborn QA4
tab QA4

gen forborn=QA4
recode forborn 1=0  2=1 98/99=0
tab forborn, missing
move forborn QA4

gen usborn_miss=0
replace usborn_miss=1 if QA4==98
replace usborn_miss=1 if QA4==99
tab usborn_miss

gen forborn_miss=0
replace forborn_miss=1 if QA4==98
replace forborn_miss=1 if QA4==99
tab forborn_miss

*Binary indicators that threat missing data as missing; missing numbers NOT recorded as zero*
gen job=.
replace job=0 if QF5_A==2  | QF5A_A==2
replace job=1 if QF5_A==1  | QF5A_A==1
tab job, missing
label var job "unfairly denied job"

gen job_miss=0
replace job_miss=1 if (QF5_A>2 & usborn==0) | (QF5A_A>2 & usborn==1)
tab job_miss

gen promote=.
replace promote=0 if QF5_B==2  | QF5A_B==2
replace promote=1 if QF5_B==1  | QF5A_B==1
tab promote, missing
label var promote "unfairly denied promotion"

gen promote_miss=0
replace promote_miss=1 if (QF5_B>2 & usborn==0) | (QF5A_B>2 & usborn==1)
tab promote_miss

gen police=.
replace police=0 if QF5_C==2  | QF5A_C==2
replace police=1 if QF5_C==1  | QF5A_C==1
tab police, missing
label var police "unfairly treated by police"

gen police_miss=0
replace police_miss=1 if (QF5_C>2 & usborn==0) | (QF5A_C>2 & usborn==1)
tab police_miss

gen house=.
replace house=0 if QF5_D==2  | QF5A_D==2
replace house=1 if QF5_D==1  | QF5A_D==1
tab house, missing
label var house "unfairly denied housing"

gen house_miss=0
replace house_miss=1 if (QF5_D>2 & usborn==0) | (QF5A_D>2 & usborn==1)
tab house_miss

gen service=.
replace service=0 if QF5_E==2  | QF5A_E==2
replace service=1 if QF5_E==1  | QF5A_E==1
tab service, missing
label var service "unfairly treated in service ind"

gen service_miss=0
replace service_miss=1 if (QF5_E>2 & usborn==0) | (QF5A_E>2 & usborn==1)
tab service_miss

gen hate=.
replace hate=0 if QF6==2  | QF6==2
replace hate=1 if QF6==1  | QF6==1
tab hate, missing
label var hate "suffered hate crime"

gen hate_miss=0
replace hate_miss=1 if QF6>2
tab hate_miss

*Sum of binary victim scores
gen victimsum=job+promote+house+police+service+hate
tab victimsum, missing
recode victimsum .=0


***CONTROLS***

*QS1 Gender of respondent
gen female=GENDER  
recode female 1=0 2=1 9=.
label var female "Female"
label define female 0 "Male" 1 "Female"
label value female female
replace female = 0 if GENDER ==9
move female GENDER 

*Generate missing female variable*
gen fem_miss=0
replace fem_miss=1 if GENDER == 9
tab fem_miss

*QJ6 Pre-tax household income last year
*Missings coded as 0*
gen inc=QJ6
recode inc 98/99=.
tab inc, missing
tab QJ6
move inc QJ6
replace inc=(inc-1)/7
recode inc .=0
tab inc, missing

gen inc2=QJ6
recode inc2 98/99=.

*Generate missing income variable*
gen inc_miss=0
replace inc_miss=1 if QJ6==98
replace inc_miss=1 if QJ6==99
tab inc_miss

*Recode education*
tab QJ1
gen ed=.
replace ed=QJ1
recode ed 1/2=1 3=2 4=3 5=4 6/10=5 11=. 98/99=.
tab ed

gen ed2=.
replace ed2=QJ1
recode ed2 1/2=1 3=2 4=3 5=4 6/10=5 11=. 98/99=.
tab ed2

label define ed 1 "Did not graduate from high school" 2 "High School graduate" 3 "Some college, but no degree (yet)" 4 "4-year college degree" 5 "Postgraduate degree (MA, MBA, MD, JD, PhD, etc.)"
label define ed2 1 "Did not graduate from high school" 2 "High School graduate" 3 "Some college, but no degree (yet)" 4 "4-year college degree" 5 "Postgraduate degree (MA, MBA, MD, JD, PhD, etc.)"
label value ed  ed 
label value ed  ed2 
tab ed
replace ed=(ed-1)/4
tab ed, missing
recode ed .=0
tab ed, missing
label var ed "Level of Education"

gen ed_miss=0
replace ed_miss=1 if QJ1==11
replace ed_miss=1 if QJ1==98
replace ed_miss=1 if QJ1==99
tab ed_miss, missing

*Year Born / Age*
tab QJ10
label var QJ10 "Year Born (RAW)"
gen ageyears=.
replace ageyears=2013-(QJ10+1) if QJ10>=1912 & QJ10<9999
tab ageyears, missing

gen age=(ageyears-22)/78
tab age, missing
replace age=0 if QJ10>1990
replace age=0 if QJ10==.
tab age, missing

gen age_miss=0
replace age_miss=1 if QJ10>1990
tab age_miss

gen ageyears2=ageyears^2
tab ageyears2, missing

gen age2=(ageyears2-484)/9516
tab age2, missing
replace age2=0 if QJ10>1990
tab age2, missing

gen age2_new = age*age

*Code cohort/time spent in the US*
tab QA6 if usborn==1

*First generate # of years person lived in the US*
gen yearsus=QA6
recode yearsus 97=. 98=. 99=.

replace yearsus =2013-(QA6+1) if QA6>1923 & QA6<9999
tab yearsus, missing

*Generate proportion of life in the US for available data
*Note that 10-15 people report that they moved to the US before they were born, those people will also be treated as missing*

gen lifeinus=yearsus/ageyears
tab lifeinus, missing
replace lifeinus=. if lifeinus>1
tab lifeinus, missing

*US born people recoded as 1, as they spent 100% of lives in the US*
replace lifeinus=1 if usborn==1
tab lifeinus, missing

*Missing data variable for time in US
gen cohort_miss=0
replace cohort_miss=1 if lifeinus==.
tab cohort_miss
label var cohort_miss "missing data for time in US"

*Recode cohort data*
replace lifeinus=0 if lifeinus==.
tab lifeinus

rename lifeinus cohort
label var cohort "percentage of life spent in US"

*QH1 What is your religious background?
gen relig1=.
replace relig1=1
replace relig1=0 if QH1>32
tab relig1
move relig1 QH1

*QJ13 Visa, green card, or U.S. citizen?
gen citizen=QJ13
recode citizen 1/2=0 3=1 97/99=0
move citizen QJ13
tab citizen
tab QJ13

*ideology scale 
gen ideo=.
replace ideo=1 if QG4A==1
replace ideo=2 if QG4A==2
replace ideo=3 if QG4C==1
replace ideo=4 if QG4C==2
replace ideo=5 if QG4B==2
replace ideo=6 if QG4B==1
label define ideo 1 "Strong liberal" 2 "Not so strong liberal" 3 "Lean liberal" 4 "Lean conservative" 5 "Not so strong conservative" 6 "Strong Conservative" 
tab ideo
move ideo QG4A
label value ideo ideo

*inverting & rescaling (1: extremely liberal)
gen ideo1=(6-ideo)/5
label var ideo1 "Ideology (Recoded; High: Liberal)"
move ideo1 QG4
recode ideo1 .=0
tab ideo1, missing

gen ideo1_bi=ideo1
replace ideo1_bi=0 if ideo1!=.
replace ideo1_bi=1 if ideo1==.
tab ideo1_bi

gen ideo1_miss=0
replace ideo1_miss=1 if ideo==.
tab ideo1_miss


***POLITICAL INTEREST***

*QB1 Interested in Politics
gen interest = (4-QB1)/3 if QB1 <=4
recode interest (.=0)

gen interest_missing = 1 if interest == .
recode interest_missing (.=0) 

*QC1 Registered to vote
gen vote_registered = 1 if QC1 == 1
recode vote_registered (.=0) if QC1 == 2
recode vote_registered (.=0)
label define yes 1 "Yes" 0 "No"
label value vote_registered yes

gen vote_registered_miss = 0
replace vote_registered_miss = 1 if QC1 !=1 & QC1 !=2

* Seek Political News from Newspapers, Internet, Radio and/or TV (at least one)
gen pol_news = 1 if QB2 == 1
recode pol_news (.=0) if QB2 == 2
replace pol_news = 1 if QB2AA == 1
recode pol_news (.=0) if QB2AA == 2
replace pol_news = 1 if QB3 == 1
recode pol_news (.=0) if QB3 == 2
replace pol_news = 1 if QB4 == 1
recode pol_news (.=0) if QB4 == 2
label value pol_news yes

gen pol_news_miss = 0 if pol_news !=.
replace pol_news_miss = 1 if pol_news == .
recode pol_news (.=0)

* Seek Political News from Newspapers, Internet, Radio and/or TV (additive)
gen pol_news1 = 1 if QB2 == 1
recode pol_news1 (.=0) if QB2 == 2
gen pol_news2 = 1 if QB2AA == 1
recode pol_news2 (.=0) if QB2AA == 2
gen pol_news3 = 1 if QB3 == 1
recode pol_news3 (.=0) if QB3 == 2
gen pol_news4 = 1 if QB4 == 1
recode pol_news4 (.=0) if QB4 == 2
label value pol_news* yes
egen pol_news_sum = rowtotal(pol_news1 pol_news2 pol_news3 pol_news4), missing
egen pol_news_sum2 = rowtotal(pol_news1 pol_news2 pol_news3 pol_news4)
gen pol_news_sum3 = pol_news_sum2/4

gen pol_news_sum3_miss = 0 if pol_news_sum !=.
replace pol_news_sum3_miss = 1 if pol_news_sum ==.

* QC2 Likelihood of Voting
gen vote_likelihood = 1 if QC2 == 1
replace vote_likelihood = 2 if QC2 == 2
replace vote_likelihood = 3 if QC2 == 3
replace vote_likelihood = 4 if QC2 == 4
label define vote 1 "Absolutely certain" 2 "Will probably" 3 "Chance is 50-50" 4 "Less than that"
gen vote_likelihood2 = (4-vote_likelihood)/3
label value vote_likelihood vote

gen vote_likelihood_miss = 0 if vote_likelihood !=.
replace vote_likelihood_miss = 1 if vote_likelihood == .
recode vote_likelihood (.=0)

gen vote_likelihood2_miss = 0 if vote_likelihood2 !=.
replace vote_likelihood2_miss = 1 if vote_likelihood2 == .
recode vote_likelihood2 (.=0)

* QC10 Vote state/primary
gen state_vote = 1 if QC10 == 1
replace state_vote = 0 if QC10 == 2
label value state_vote yes

gen state_vote_miss = 0 if state_vote !=.
replace state_vote_miss = 1 if state_vote == .
recode state_vote (.=0)

* QC7 2004 Pres
gen pres_vote = 1 if QC7 == 1
replace pres_vote = 0 if QC7 == 2
label value pres_vote yes

gen pres_vote_miss = 0 if pres_vote !=.
replace pres_vote_miss = 1 if pres_vote == .
recode pres_vote (.=0)


***POLITICAL ACTION***

*Binary measure of any action
gen pol_act = 1 if QC15_1 == 1
recode pol_act (.=0) if QC15_1 == 2
replace pol_act = 1 if QC15_2 == 1
recode pol_act (.=0) if QC15_2 == 2
replace pol_act = 1 if QC15_3 == 1
recode pol_act (.=0) if QC15_3 == 2
replace pol_act = 1 if QC15_4  == 1
recode pol_act (.=0) if QC15_4 == 2
replace pol_act = 1 if QC15_6  == 1
recode pol_act (.=0) if QC15_6 == 2
replace pol_act = 1 if QC15_7  == 1
recode pol_act (.=0) if QC15_7 == 2

label value pol_act yes

tab pol_act

gen pol_act_miss = 0 if pol_act !=.
replace pol_act_miss = 1 if pol_act == .
recode pol_act (.=0)

*Additive measure of political action
gen pol_act2 = 1 if QC15_2 == 1
recode pol_act2 (.=0) if QC15_2 == 2
tab pol_act2

gen pol_act3 = 1 if QC15_3 == 1
recode pol_act3 (.=0) if QC15_3 == 2
tab pol_act3

gen pol_act4 = 1 if QC15_4 == 1
recode pol_act4 (.=0) if QC15_4 == 2
tab pol_act4

gen pol_act6 = 1 if QC15_6 == 1
recode pol_act6 (.=0) if QC15_6 == 2
tab pol_act6

gen pol_act7 = 1 if QC15_7 == 1
recode pol_act7 (.=0) if QC15_7 == 2
tab pol_act7

label value pol_act* yes

egen pol_act_sum_alt = rowtotal(pol_act2 pol_act3 pol_act4 pol_act6 pol_act7), missing
egen pol_act_sum_alt2 = rowtotal(pol_act2 pol_act3 pol_act4 pol_act6 pol_act7)
gen pol_act_sum_alt3 = pol_act_sum_alt2/5
tab pol_act_sum_alt3

gen pol_act_sum_alt3_miss = 0 if pol_act_sum_alt !=.
replace pol_act_sum_alt3_miss = 1 if pol_act_sum_alt ==.



********************************************************************************************************
********************************************************************************************************
****                                            ANALYSIS                                            **** 
********************************************************************************************************
********************************************************************************************************

** Row Labels
label var victim "Social Exclusion"
label var female "Female"
label var inc "Income"
label var ed "Education"
label var age "Age"
label var age2_new "Age$^2$"
label var cohort "Percentage of Life Spent in US"
label var relig1 "Religious"
label var citizen "US Citizen"
label var ideo1 "Ideology"


** Summary Statistics in Observational Study Text + Table B.1
* For text of paper
count
su usborn
su inc2
tab QD1
tab QA1
* Chinese + Taiwanese
di 24.19 + 1.96
* S. Asian + Indian + Bangladeshi + Maldivian + Pakistani + Sri Lankan
di 0.97+21.24+0.08+0.06+0.02+0.02
tab pidcat
tab victimsum
di 100-64.88-18.28-9.59
* Table B.1 + text of paper
gen pid2 = (pid*5)+1
tab inc2, gen(inc3)
tab ed2, gen(ed3)
su pid2 dem victim female inc3* ed3* ageyears cohort relig1 citizen ideo


** Table 1 (Main)
set more off
eststo: reg pid victim victim_miss female fem_miss inc inc_miss ed ed_miss age age2_new age_miss cohort cohort_miss relig1 citizen 
eststo: reg pid victim victim_miss female fem_miss inc inc_miss ed ed_miss age age2_new age_miss cohort cohort_miss relig1 citizen ideo1 ideo1_miss
eststo: reg pidx victim victim_miss female fem_miss inc inc_miss ed ed_miss age age2_new age_miss cohort cohort_miss relig1 citizen 
eststo: reg pidx victim victim_miss female fem_miss inc inc_miss ed ed_miss age age2_new age_miss cohort cohort_miss relig1 citizen ideo1 ideo1_miss
esttab using output_tab1.tex, ar2(2) b(2) se(2) starlevels(* 0.1 ** .02 *** .002) label replace
eststo clear


** Table B.2 (Multinomial Logit)
set more off
eststo: mlogit pidcat victim victim_miss female fem_miss inc inc_miss ed ed_miss age age2_new age_miss cohort cohort_miss relig1 citizen, b(1)
eststo: mlogit pidcat victim victim_miss female fem_miss inc inc_miss ed ed_miss age age2_new age_miss cohort cohort_miss relig1 citizen ideo1 ideo1_miss, b(1)
esttab using output_tabB2.tex, pr2(2) b(2) se(2) starlevels(* 0.1 ** .02 *** .002) label replace
eststo clear


** TABLE B.3 (Imputation)
set more off
mi set wide 
mi register imputed pid
mi register regular female fem_miss inc inc_miss victim victim_miss ed ed_miss age age2_new age_miss cohort cohort_miss relig1 citizen 
mi describe
mi impute chained (pmm) pid = female fem_miss inc inc_miss victim victim_miss ed ed_miss age age2 age_miss cohort cohort_miss relig1 citizen,add(45)
mi estimate: reg pid victim victim_miss female fem_miss inc inc_miss ed ed_miss age age2_new age_miss cohort cohort_miss relig1 citizen

mi set wide 
mi register regular female fem_miss inc inc_miss victim victim_miss ed ed_miss age age2_new age_miss cohort cohort_miss relig1 citizen ideo1 ideo1_miss
mi describe
mi impute chained (pmm) pid = female fem_miss inc inc_miss victim victim_miss ed ed_miss age age2 age_miss cohort cohort_miss relig1 citizen ideo1 ideo1_miss,add(45)
mi estimate: reg pid victim victim_miss female fem_miss inc inc_miss ed ed_miss age age2_new age_miss cohort cohort_miss relig1 citizen ideo1 ideo1_miss 


** Table B.4 (Logit)
set more off
eststo: logit dem victim victim_miss female fem_miss inc inc_miss ed ed_miss age age2_new age_miss cohort cohort_miss relig1 citizen, asis
mfx
eststo: logit dem victim victim_miss female fem_miss inc inc_miss ed ed_miss age age2_new age_miss cohort cohort_miss relig1 citizen ideo1 ideo1_miss, asis
mfx
esttab using output_tabB4.tex, pr2(2) b(2) se(2) starlevels(* 0.1 ** .02 *** .002) label replace
eststo clear


** TABLE B.5 (Interaction between Income and Key Predictors)
gen incxvictim = inc*victim

set more off
eststo: reg pid victim victim_miss incxvictim inc
eststo: reg pid victim victim_miss incxvictim female fem_miss inc inc_miss ed ed_miss age age2_new age_miss cohort cohort_miss relig1 citizen
eststo: reg pid victim victim_miss incxvictim female fem_miss inc inc_miss ed ed_miss age age2_new age_miss cohort cohort_miss relig1 citizen ideo1 ideo1_miss

eststo: reg pidx victim victim_miss incxvictim inc
eststo: reg pidx victim victim_miss incxvictim female fem_miss inc inc_miss ed ed_miss age age2_new age_miss cohort cohort_miss relig1 citizen
eststo: reg pidx victim victim_miss incxvictim female fem_miss inc inc_miss ed ed_miss age age2_new age_miss cohort cohort_miss relig1 citizen ideo1 ideo1_miss
esttab using output_tabB5.tex, ar2(2) b(2) se(2) starlevels(* 0.1 ** .02 *** .002) label replace
eststo clear


** Table B.6 (Interaction between Political Interest and Key Predictors)
foreach var in vote_registered vote_likelihood2 state_vote pres_vote pol_news_sum3 pol_act_sum_alt3 interest{
	gen `var'xvictim = `var'*victim
	}

set more off
foreach var in vote_registered vote_likelihood2 state_vote pres_vote pol_news_sum3 pol_act_sum_alt3 interest{
	eststo: reg pidx victim victim_miss `var'xvictim `var' `var'_miss 
	eststo: reg pidx victim victim_miss `var'xvictim `var' `var'_miss female fem_miss inc inc_miss ed ed_miss age age2_new age_miss cohort cohort_miss relig1 citizen
	}

esttab using output_tabB6.tex, ar2(2) b(2) se(2) starlevels(* 0.1 ** .02 *** .002) label replace
eststo clear
